Python DataFrame Filling choices:

1. append
2. update
3. set_value

This Notebook compares the three methods using timeit.timeit function.


In [1]:
import pandas as pd
import datetime
import timeit


C:\Users\TianYuSimon\Anaconda3\lib\site-packages\pandas\computation\__init__.py:19: UserWarning: The installed version of numexpr 2.4.4 is not supported in pandas and will be not be used

  UserWarning)

In [7]:
#Use append 
pd_index=pd.DatetimeIndex(start=datetime.datetime(2016,10,1),end=datetime.datetime(2016,10,1),freq='1D')
df=pd.DataFrame(columns=['data'])
df_1=pd.DataFrame(data={'data':1231},index=pd_index)

def wrapper(func, *args, **kwargs):
    def wrapped():
        return func(*args, **kwargs)
    return wrapped

def app(df,df1):
    for i in range(0,100,1):
        df=df.append(df_1)
        

wrapped = wrapper(app, df,df_1)
timeit.timeit(wrapped,number=100)


Out[7]:
15.897420258961006

In [8]:
# Use set value
pd_index=pd.DatetimeIndex(start=datetime.datetime(2016,10,1),end=datetime.datetime(2016,10,2),freq='1T')
df=pd.DataFrame(columns=['data'],index=pd_index)

def wrapper(func, *args, **kwargs):
    def wrapped():
        return func(*args, **kwargs)
    return wrapped

def app(df):
    for i in range(0,100,1):
        df.set_value(pd_index[i],'data',1231)
wrapped = wrapper(app, df)
timeit.timeit(wrapped,number=100)


Out[8]:
0.21142389073108347

In [9]:
# Use update value
pd_index=pd.DatetimeIndex(start=datetime.datetime(2016,10,1),end=datetime.datetime(2016,10,2),freq='1T')
df=pd.DataFrame(columns=['data'],index=pd_index)

def wrapper(func, *args, **kwargs):
    def wrapped():
        return func(*args, **kwargs)
    return wrapped

def app(df):
    for i in range(0,100,1):
        df_1=pd.DataFrame(data={'data':1231},index=pd_index[:i])       
        df.update(df_1)
        
wrapped = wrapper(app, df)

timeit.timeit(wrapped,number=100)


Out[9]:
15.572011684825526

In [ ]: